# SETUP -------------------------------------------------------------------

# Libraries
library(tidyverse)
library(gsynth)
library(panelView)
library(xtable)
library(patchwork)
library(stargazer)

# Custom functions
source(paste0(here::here(), "/input/funs/funs.R"))

# Data
df <- readRDS(paste0(here::here(), "/input/processed/applications_1997-2024.RDS"))


# DATA PREP ---------------------------------------------------------------

# Splitting up the data in a dataset for each study and performing the
# manipulations described in the manuscript/detailed in the appendix

# NURSE-DATA (STUDY 2)
nurse_df_total <- df |> 
  filter(treatment != "Lærerlockout")

# Alternative imputing: Just using observed median
nurse_df_total <- nurse_df_total |> 
  group_by(uddannelse) |> 
  mutate(
    andel_kvote2_alternativ = if_else(
      is.na(andel_kvote2_prio1),
      median(andel_kvote2_prio1, na.rm = TRUE),
      andel_kvote2_prio1)) |> 
  ungroup()

# Imputing missing values
for (u in unique(nurse_df_total$uddannelse)) {
  nurse_df_total <- nurse_df_total |> 
    lm_impute(missing_var = "andel_kvote2_prio1", rhs = "år", education = u) |> 
    lm_impute(missing_var = "andel_kvote2_total", rhs = "år", education = u)
}

# Calculating number of kvote 2 applications
nurse_df_total <- nurse_df_total |> 
  mutate(
    ansøgninger_prio1_kvote2 = if_else(
      år < 2009,
      ansøgninger_prio1 * andel_kvote2_prio1,
      ansøgninger_prio1_kvote2),
    ansøgninger_prio1_kvote2_alternativ = if_else(
      år < 2009,
      ansøgninger_prio1 * andel_kvote2_alternativ,
      ansøgninger_prio1_kvote2),
    ansøgninger_total_kvote2 = if_else(
      år < 2009,
      ansøgninger_total * andel_kvote2_total,
      ansøgninger_total_kvote2))

# Fixing the name of the education
nurse_df_total <- nurse_df_total |> 
  mutate(uddannelse = if_else(
    treatment == "Sygeplejerskestrejke",
    "Sygeplejerske",
    uddannelse))

# Splitting into kvote 1 and kvote 2 data frames
nurse_df_k1 <- nurse_df_total |> 
  mutate(
    ansøgninger_main = ansøgninger_prio1 - ansøgninger_prio1_kvote2,
    ansøgninger_main_robust = ansøgninger_prio1 - ansøgninger_prio1_kvote2_alternativ,
    ansøgninger_robust_total = ansøgninger_total - ansøgninger_total_kvote2,
    # Fixing name (just for plotting conveniece)
    uddannelse = if_else(
      treatment == "Sygeplejerskestrejke",
      "Sygeplejerske (kvote 1)",
      uddannelse))

nurse_df_k2 <- nurse_df_total |> 
  mutate(
    ansøgninger_main = ansøgninger_prio1_kvote2,
    ansøgninger_main_robust = ansøgninger_prio1_kvote2_alternativ,
    ansøgninger_robust_total = ansøgninger_total_kvote2,
    # Fixing name (just for plotting conveniece)
    uddannelse = if_else(
      treatment == "Sygeplejerskestrejke",
      "Sygeplejerske (kvote 2)",
      uddannelse))

# TEACHER DATA (STUDY 1)
# No grade-data before 2009 so I impute the 2009-value or median
teacher_df <- df |> 
  filter(treatment != "Sygeplejerskestrejke" & år <= 2019) |> 
  group_by(uddannelse) |> 
  mutate(
    karakter_alternativ = if_else(is.na(karakter), median(karakter, na.rm = TRUE), karakter),
    grade_2009 = ifelse(any(år == 2010), karakter[år == 2009], NA),
    karakter = ifelse(år < 2009 & is.na(karakter), grade_2009, karakter)
  ) |> 
  select(-grade_2009) |> 
  ungroup()

# Estimating the share with a grade above 7 by
# assuming a weibull-distribution - fit to match 2016 applications
shape <-  2.2
teacher_df$andel_over7 <- with(
  teacher_df,
  (1 - pweibull(7, shape = shape, scale = (karakter) / gamma(1 + 1 / shape))))

# Also estimating based on the imputed median (for robustness)
shape <-  2.2
teacher_df$andel_over7_alternativ <- with(
  teacher_df,
  (1 - pweibull(7, shape = shape, scale = (karakter_alternativ) / gamma(1 + 1 / shape))))

# Calculating applications with a grade above 7
teacher_df <- teacher_df |> 
  mutate(
    ansøgninger_main = ansøgninger_prio1 * andel_over7,
    ansøgninger_main_robust = ansøgninger_prio1 * andel_over7_alternativ,
    ansøgninger_robust_total = ansøgninger_total * andel_over7,
    ansøgninger_robust_uopdelt = ansøgninger_prio1,
    uddannelse = if_else(
      treatment == "Lærerlockout",
      "Folkeskolelærer",
      uddannelse))


# FIGURE 1: SYNTHETIC CONTROL RESULTS, TEACHERS ---------------------------

# Filter to only donors of "professional bachelor" level
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor")

# Fit model
fit_teacher_main <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4
)

# Proces results
teacher_main <- report_results(
  fit = fit_teacher_main,
  education_name = "Folkeskolelærer",
  t0 = 2012)

# Make plot
teacher_main[[1]] | teacher_main[[2]]

# Save plot
ggsave(paste0(here::here(),"/output/figure1.png"), width = 10, height = 5.5)



# FIGURE 2: IN-PLACE PLACEBOS, TEACHERS -----------------------------------

# Empty data frame to store results
place_placebos_teacher <- data.frame()

# Only include relevant donors and only complete time series
complete_obs <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor") |> 
  filter(!is.na(ansøgninger_main)) |> 
  group_by(uddannelse) |> 
  mutate(n = n()) |> 
  ungroup() |> 
  filter(n >= 23)

# Loop over all units and use each unit as treatment-unit
for (u in unique(complete_obs$uddannelse)) {
  
  gsk_df <- teacher_df |> 
    filter(uddannelsesniveau == "Prof. bachelor") |> 
    mutate(d = if_else(
      uddannelse == u & år >= 2013,
      1,
      0))
  
  out_teacher <- gsynth(
    Y = "ansøgninger_main",
    D = "d",
    X = c("andel_kvinder_prio1", "karakter"),
    data = gsk_df,
    index = c("uddannelse", "år"),
    na.rm = TRUE,
    se = TRUE,
    force = "two-way",
    estimator = "mc",
    min.T0 = 5,
    #nboots = 1000,
    seed = 4,
    CV = TRUE)
  
  temp <- data.frame(
    uddannelse = u,
    t = as.vector(out_teacher[["time"]]),
    faktisk = as.vector(out_teacher[["Y.tr"]]),
    sk = as.vector(out_teacher[["Y.ct"]]),
    t0 = 2012)
  
  place_placebos_teacher <- rbind(place_placebos_teacher, temp)
  
}

# Processing results
place_placebos_teacher <- place_placebos_teacher |> 
  mutate(estimate = faktisk - sk,
         d = if_else(
           uddannelse == "Folkeskolelærer",
           "Teachers",
           "Placebo"))

# Make plot
place_placebos_teacher |> 
  ggplot(aes(x = t, y = estimate, group = uddannelse, color = d)) +
  geom_line() +
  xlim(c(1997, 2019)) +
  scale_color_manual(
    values = c(
      "Placebo" = "grey",
      "Teachers" = "black")) +
  theme_bw() +
  labs(x = NULL,
       color = NULL,
       y = "Estimate")

# Save plot
ggsave(paste0(here::here(), "/output/figure2.png"), width = 10, height = 5.5)


# FIGURE 3: SYNTHETIC CONTROL RESULTS, NURSES (QUOTA 1) -------------------

# Filter to only relevant donor units
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor")

# Fit model
fit_nurse_k1_main <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

# Proces results
nurse_k1_main <- report_results(
  fit = fit_nurse_k1_main,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

# Make plot
nurse_k1_main[[1]] | nurse_k1_main[[2]]

# Save plot
ggsave(paste0(here::here(), "/output/figure3.png"), width = 10, height = 5.5)


# FIGURE 4: SYNTHETIC CONTROL RESULTS, NURSES (QUOTA 2) -------------------

# Filter to only relevant donor units
gsk_df <- nurse_df_k2 |> 
  filter(uddannelsesniveau == "Prof. bachelor")

# Fit model
fit_nurse_k2_main <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

# Proces results
nurse_k2_main <- report_results(
  fit = fit_nurse_k2_main,
  education_name = "Sygeplejerske (kvote 2)",
  t0 = 2020)

# Make plot
nurse_k2_main[[1]] | nurse_k2_main[[2]]

# Save plot
ggsave(paste0(here::here(), "/output/figure4.png"), width = 10, height = 5.5)


# FIGURE 5: IN-PLACE PLACEBOS, NURSES -------------------------------------

# Make empty data frame to store results
place_placebos_nurse_k1 <- data.frame()

# Filter to only relevant donor units and only complete time series
complete_obs <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor") |> 
  filter(!is.na(ansøgninger_main)) |> 
  group_by(uddannelse) |> 
  mutate(n = n()) |> 
  ungroup() |> 
  filter(n >= 28)

# Loop over all units and pretend each is treated
for (u in unique(complete_obs$uddannelse)) {
  
  gsk_df <- nurse_df_k1 |> 
    filter(uddannelsesniveau == "Prof. bachelor") |> 
    mutate(d = if_else(
      uddannelse == u & år >= 2021,
      1,
      0))
  
  out_nurse_k1 <- gsynth(
    Y = "ansøgninger_main",
    D = "d",
    X = c("andel_kvinder_prio1"),
    data = gsk_df,
    index = c("uddannelse", "år"),
    na.rm = TRUE,
    se = TRUE,
    force = "two-way",
    estimator = "mc",
    min.T0 = 5,
    #nboots = 1000,
    seed = 4,
    CV = TRUE)
  
  temp <- data.frame(
    uddannelse = u,
    t = as.vector(out_nurse_k1[["time"]]),
    faktisk = as.vector(out_nurse_k1[["Y.tr"]]),
    sk = as.vector(out_nurse_k1[["Y.ct"]]),
    t0 = 2020)
  
  place_placebos_nurse_k1 <- rbind(place_placebos_nurse_k1, temp)
  
}

# Proces results
place_placebos_nurse_k1 <- place_placebos_nurse_k1 |> 
  mutate(estimate = faktisk - sk,
         d = if_else(
           uddannelse == "Sygeplejerske (kvote 1)",
           "Nurses",
           "Placebo"))

# Make plots
place_placebos_nurse_k1 |> 
  ggplot(aes(x = t, y = estimate, group = uddannelse, color = d)) +
  geom_line() +
  xlim(c(1997, 2024)) +
  scale_color_manual(
    values = c(
      "Placebo" = "grey",
      "Nurses" = "black")) +
  theme_bw() +
  labs(x = NULL,
       color = NULL,
       y = "Estimate")

# Save plot
ggsave(paste0(here::here(), "/output/figure5.png"), width = 10, height = 5.5)
